This document explores the mango data from SIAP - Mexico. - Data has been summarized at the state level. - 26 states report mango production during the time period of 1980 - 2016. - There are 962 observations total (26 states x 37 years), one observation/year for each state.

## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   1.0.2
## ✔ tidyr   0.8.3     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## Warning: package 'dplyr' was built under R version 3.6.2
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
SIAP <- read.csv("/Users/erikaluna/R\ Studio/msc_thesis/SIAP.csv") 
mango_irri_state <- SIAP %>% 
  filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora"), year < 2002, crop == "mango", water == "irrigated") %>% 
  select(year, state, water, yield, losses)
#mango_irri_state %>% 
  #DT::datatable()

mango_irri_mun <- SIAP %>% 
  filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora"), year > 2001, crop == "mango", water == "irrigated") %>% 
  select(year, state, water, yield, losses)
mango_irri_mun %>% 
  DT::datatable()
mango_rain_state <- SIAP %>% 
  filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora"), year < 2002, crop == "mango", water == "rainfed") %>% 
  select(year, state, water, yield, losses)
#mango_rain_state %>% 
  #DT::datatable()
mango_rain_mun <- SIAP %>% 
  filter(state %in% c("baja california","baja california sur", "chihuahua", "durango", "nayarit", "sinaloa", "sonora"), year > 2001, crop == "mango", water == "rainfed") %>% 
  select(year, state, water, yield, losses)
mango_rain_mun %>% 
  DT::datatable()
mango_water_state <- bind_rows(mango_irri_state, mango_rain_state)
mango_water_mun <- bind_rows(mango_irri_mun, mango_rain_mun)
mango_water_state %>% 
  ggplot(aes(water, yield)) +
  geom_boxplot() +
  scale_y_continuous("Yield (ha)", trans = "log10") +
  ylab("Yield (ha)") +
  xlab("Water mode") +
  ggtitle("Mango State level")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).

  #scale_y_continuous(labels = comma) +
  #theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
mango_water_mun %>% 
  ggplot(aes(water, yield)) +
  geom_boxplot() +
  scale_y_continuous("Yield (ha)", trans = "log10") +
  ylab("Yield (ha)") +
  xlab("Water mode") +
  ggtitle("Mango Municipal level")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 113 rows containing non-finite values (stat_boxplot).

  #scale_y_continuous(labels = comma) +
  #theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))

Mango

one_crop <- SIAP %>% 
  filter(crop == "mango") %>% 
  group_by(year, state) %>% 
  summarise(ag_yield = round(sum(production)/sum(harvested), digits = 2),
            ag_prod = sum(production),
            ag_planted = sum(planted),
            ag_harv = sum(harvested), 
            ag_losses = sum(losses))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)

Mango national

mango_nat <- SIAP %>% 
  filter(crop == "mango") %>% 
  #filter(state %in% c("baja california sur", "durango", "nayarit", "sinaloa", "sonora"), crop == "mango") %>% 
  group_by(year) %>% 
  summarise(ag_yield = round(sum(production)/sum(harvested), digits = 2),
            ag_prod = sum(production),
            ag_planted = sum(planted),
            ag_harv = sum(harvested), 
            ag_losses = sum(losses))
## `summarise()` ungrouping output (override with `.groups` argument)
mango_nat %>% 
  DT::datatable()

Number of observations

## `summarise()` ungrouping output (override with `.groups` argument)

Panel data

A data frama with all years and all states that grow mango.

period <- tibble(rep(c(1980:2016), times = 26)) #26 states report mango production
colnames(period) <- c("year") 
states <- tibble(rep(c("baja california", "baja california sur", "campeche", 
                       "chiapas", "colima", "durango",
                        "guanajuato", "guerrero", "hidalgo", "jalisco", 
                       "mexico", "michoacan", "morelos", "nayarit", "oaxaca", 
                       "puebla", "queretaro", "quintana roo",
                        "san luis potosi", "sinaloa", "sonora", "tabasco", 
                       "tamaulipas", "veracruz", "yucatan", "zacatecas"), times = 37))
colnames(states) <- c("state") 
states <- states %>% 
  arrange(state)
states_period <- cbind(states, period)

Data frame for Mango

mango <- left_join(states_period, one_crop, by=c("state", "year"))
mango <- mango %>%  
  transform(i=as.numeric(factor(state))) %>% 
  transform(t=as.numeric(factor(year))) %>% 
  group_by(year) %>% 
  arrange(state) 

mango %>% 
  DT::datatable()

Plots

Production

mango %>% 
  group_by(state) %>% 
  summarise(max_prod = max(ag_prod, na.rm=T),
            min_prod = min(ag_prod, na.rm=T),
            range_prod = max(ag_prod, na.rm=T) - min(ag_prod, na.rm=T),
            sd_prod = sd(ag_prod, na.rm=T),
            mean_prod = mean(ag_prod, na.rm=T),
            median_prod = median(ag_prod, na.rm=T)) %>% 
  knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
state max_prod min_prod range_prod sd_prod mean_prod median_prod
baja california 0.00 0.00 0.00 NA 0.0000 0.000
baja california sur 9913.65 578.00 9335.65 2230.2162 3563.9111 2789.800
campeche 42933.70 3930.00 39003.70 11796.7935 23410.3406 26213.720
chiapas 238429.55 37760.00 200669.55 63980.3042 123791.2263 131164.700
colima 91294.00 9623.00 81671.00 19869.9166 48479.2611 49431.560
durango 1576.42 65.00 1511.42 492.2448 818.9611 720.000
guanajuato 405.00 0.00 405.00 143.2752 205.7000 211.000
guerrero 372282.78 36575.00 335707.78 93659.3813 215997.6649 189171.000
hidalgo 2568.00 0.00 2568.00 534.0370 701.3468 588.965
jalisco 113607.55 27022.00 86585.55 14033.8712 51651.2451 48642.000
mexico 9620.00 1545.00 8075.00 1583.3664 4304.7063 4499.750
michoacan 144675.07 19434.00 125241.07 36260.2297 95737.3186 109750.000
morelos 10726.00 5079.99 5646.01 1327.1275 7905.7977 8055.000
nayarit 364814.00 52362.00 312452.00 81069.9913 186393.2317 181319.350
oaxaca 275120.00 73573.00 201547.00 33085.2491 174068.0546 177690.000
puebla 2507.00 122.70 2384.30 603.4554 1059.7005 920.000
queretaro 1414.00 102.00 1312.00 297.5532 522.3822 460.000
quintana roo 230.00 20.00 210.00 63.8367 114.8667 102.000
san luis potosi 19429.00 775.00 18654.00 3118.7475 4240.9571 4008.000
sinaloa 339530.40 11897.00 327633.40 79157.4889 158816.6494 158796.000
sonora 3916.00 0.00 3916.00 676.7596 539.5373 349.500
tabasco 9348.00 980.30 8367.70 2557.4636 3826.4303 3152.500
tamaulipas 12984.98 0.00 12984.98 3391.8427 6345.0753 6737.000
veracruz 311128.00 87844.80 223283.20 65246.1948 189157.0960 182775.000
yucatan 16149.00 1113.40 15035.60 2807.8291 5158.2930 5086.000
zacatecas 1419.00 21.00 1398.00 298.1457 334.0278 169.500
mango %>% 
  ggplot(aes(state, ag_prod)) +
  geom_boxplot() +
  ylab("Production (tonnes)") +
  xlab("State") +
  #scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 127 rows containing non-finite values (stat_boxplot).

number_obs <- mango %>% 
  group_by(state) %>% 
  summarise(obs = sum(!is.na(ag_prod)))
## `summarise()` ungrouping output (override with `.groups` argument)
mango_complete <- number_obs %>% 
  filter(obs > 34)
mango_complete
## # A tibble: 20 x 2
##    state                 obs
##    <chr>               <int>
##  1 baja california sur    35
##  2 campeche               35
##  3 chiapas                35
##  4 colima                 35
##  5 durango                35
##  6 guerrero               35
##  7 jalisco                35
##  8 mexico                 35
##  9 michoacan              35
## 10 morelos                35
## 11 nayarit                35
## 12 oaxaca                 35
## 13 puebla                 37
## 14 queretaro              37
## 15 san luis potosi        35
## 16 sinaloa                35
## 17 sonora                 37
## 18 veracruz               35
## 19 yucatan                37
## 20 zacatecas              36
mango_ts <- mango %>% 
  ggplot(aes(year, ag_prod)) + 
  geom_line()+
  ylab("Production (tonnes)") +
  xlab("Years") +
  ggtitle("Mango Production 1980 - 2016") +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
  geom_rect(data = subset(mango, state %in% c(mango_complete$state)), 
            fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
            ymin = -Inf,ymax = Inf) +     
  facet_wrap(~state, scales="free_y", ncol=5) 
  #facet_wrap(~state, ncol=5)
mango_ts  
## Warning: Removed 5 rows containing missing values (geom_path).

Yield

mango %>% 
  group_by(state) %>% 
  summarise(max_yield = max(ag_yield, na.rm=T),
            min_yield = min(ag_yield, na.rm=T),
            range_yield = max(ag_yield, na.rm=T) - min(ag_yield, na.rm=T),
            sd_yield = sd(ag_yield, na.rm=T),
            mean_yield = mean(ag_yield, na.rm=T),
            median_yield = median(ag_yield, na.rm=T)) %>% 
  knitr::kable()
## Warning in max(ag_yield, na.rm = T): no non-missing arguments to max;
## returning -Inf
## Warning in min(ag_yield, na.rm = T): no non-missing arguments to min;
## returning Inf
## Warning in max(ag_yield, na.rm = T): no non-missing arguments to max;
## returning -Inf
## Warning in min(ag_yield, na.rm = T): no non-missing arguments to min;
## returning Inf
## `summarise()` ungrouping output (override with `.groups` argument)
state max_yield min_yield range_yield sd_yield mean_yield median_yield
baja california -Inf Inf -Inf NA NaN NA
baja california sur 13.11 2.98 10.13 2.4665390 7.940286 7.980
campeche 16.23 3.54 12.69 3.1033450 11.527143 12.060
chiapas 16.45 6.36 10.09 2.7139152 9.633714 8.430
colima 18.93 6.50 12.43 3.4242340 12.265429 12.870
durango 6.50 2.14 4.36 1.1974801 4.440571 4.320
guanajuato 8.58 4.04 4.54 1.3982515 7.222500 7.480
guerrero 21.80 10.14 11.66 2.4896751 12.723429 11.640
hidalgo 13.52 4.00 9.52 1.9300048 7.066061 7.170
jalisco 14.59 7.03 7.56 1.5088998 9.834571 9.500
mexico 15.93 5.49 10.44 1.9393346 8.536000 8.390
michoacan 9.47 5.09 4.38 0.9597775 6.655143 6.380
morelos 20.04 9.57 10.47 2.4818776 13.991143 14.230
nayarit 53.97 6.70 47.27 7.5669918 12.153714 10.990
oaxaca 37.48 7.34 30.14 5.5883251 12.836000 11.400
puebla 12.23 5.33 6.90 1.5156813 8.591351 8.230
queretaro 19.64 2.45 17.19 2.6846601 7.150540 6.700
quintana roo 10.00 2.23 7.77 2.8161105 5.307333 4.590
san luis potosi 12.72 3.47 9.25 1.9822870 8.588286 8.250
sinaloa 14.99 4.02 10.97 2.7130059 10.136000 10.120
sonora 22.00 1.00 21.00 5.6936563 13.683143 14.390
tabasco 20.68 4.90 15.78 3.1584875 7.416250 6.055
tamaulipas 15.24 2.21 13.03 2.7185624 6.505758 5.700
veracruz 10.13 3.54 6.59 1.5197955 7.102571 7.360
yucatan 24.39 9.20 15.19 2.7960165 13.474054 13.300
zacatecas 15.42 2.53 12.89 3.5636149 7.518333 6.625
mango %>% 
  ggplot(aes(state, ag_yield)) +
  geom_boxplot() +
  ylab("Yield (tonnes/ha)") +
  xlab("State") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 134 rows containing non-finite values (stat_boxplot).

number_obs <- mango %>% 
  group_by(state) %>% 
  summarise(obs = sum(!is.na(ag_yield)))
## `summarise()` ungrouping output (override with `.groups` argument)
mango_complete <- number_obs %>% 
  filter(obs > 34)
mango_complete
## # A tibble: 20 x 2
##    state                 obs
##    <chr>               <int>
##  1 baja california sur    35
##  2 campeche               35
##  3 chiapas                35
##  4 colima                 35
##  5 durango                35
##  6 guerrero               35
##  7 jalisco                35
##  8 mexico                 35
##  9 michoacan              35
## 10 morelos                35
## 11 nayarit                35
## 12 oaxaca                 35
## 13 puebla                 37
## 14 queretaro              37
## 15 san luis potosi        35
## 16 sinaloa                35
## 17 sonora                 35
## 18 veracruz               35
## 19 yucatan                37
## 20 zacatecas              36
mango_ts <- mango %>% 
  ggplot(aes(year, ag_yield)) + 
  geom_line()+
  ylab("Yield (tonnes/ha)") +
  xlab("Years") +
  ggtitle("Mango Yields 1980 - 2016") +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
  geom_rect(data = subset(mango, state %in% c(mango_complete$state)), 
            fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
            ymin = -Inf,ymax = Inf) +     
  facet_wrap(~state, scales="free_y", ncol=5) 
  #facet_wrap(~state, ncol=5)
mango_ts  
## Warning: Removed 37 rows containing missing values (geom_path).

Area

mango %>% 
  group_by(state) %>% 
  summarise(max_area = max(ag_harv, na.rm=T),
            min_area = min(ag_harv, na.rm=T),
            range_area = max(ag_harv, na.rm=T) - min(ag_harv, na.rm=T),
            sd_area = sd(ag_harv, na.rm=T),
            mean_area = mean(ag_harv, na.rm=T),
            median_area = median(ag_harv, na.rm=T)) %>% 
  knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
state max_area min_area range_area sd_area mean_area median_area
baja california 0.00 0.00 0.00 NA 0.00000 0.000
baja california sur 1058.50 194.00 864.50 280.88033 482.87432 343.000
campeche 2746.00 1008.00 1738.00 656.95773 1917.71714 2172.500
chiapas 32840.57 4720.00 28120.57 8545.53501 14094.29543 15534.000
colima 5357.00 1476.00 3881.00 931.14604 3809.70556 3837.625
durango 418.00 11.00 407.00 134.83013 197.88571 120.000
guanajuato 50.00 0.00 50.00 18.14632 28.20000 33.000
guerrero 24738.40 3429.00 21309.40 6195.62129 16828.66486 16993.000
hidalgo 285.00 0.00 285.00 50.84878 96.08824 102.000
jalisco 7786.84 3444.00 4342.84 867.18085 5225.99086 5135.000
mexico 668.00 222.00 446.00 118.41836 490.45270 523.500
michoacan 22520.24 2053.00 20467.24 6375.98865 14987.50086 18080.000
morelos 955.00 350.93 604.07 174.28874 577.96297 565.000
nayarit 25032.41 6307.00 18725.41 5785.05831 16079.22571 17795.000
oaxaca 18193.00 5663.00 12530.00 3382.99136 14564.89371 14800.000
puebla 227.00 23.00 204.00 52.74027 119.24324 122.000
queretaro 154.00 12.00 142.00 30.09507 73.18919 68.000
quintana roo 52.00 5.00 47.00 15.03266 24.53333 22.000
san luis potosi 1528.00 125.00 1403.00 260.78664 467.67568 412.000
sinaloa 31603.79 1158.00 30445.79 8395.77506 16643.79343 15073.000
sonora 289.00 0.00 289.00 48.82309 39.97297 29.000
tabasco 1450.00 163.00 1287.00 306.82520 512.90625 432.500
tamaulipas 2024.00 0.00 2024.00 390.58291 963.97278 1076.000
veracruz 38765.00 17771.21 20993.79 6352.42564 26433.37200 26769.500
yucatan 1052.00 96.00 956.00 217.58591 392.02054 376.200
zacatecas 92.00 4.00 88.00 16.94997 39.11111 38.000
mango %>% 
  ggplot(aes(state, ag_harv)) +
  geom_boxplot() +
  ylab("Area (tonnes)") +
  xlab("State") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 116 rows containing non-finite values (stat_boxplot).

number_obs <- mango %>% 
  group_by(state) %>% 
  summarise(obs = sum(!is.na(ag_harv)))
## `summarise()` ungrouping output (override with `.groups` argument)
mango_complete <- number_obs %>% 
  filter(obs > 34)
mango_complete
## # A tibble: 21 x 2
##    state                 obs
##    <chr>               <int>
##  1 baja california sur    37
##  2 campeche               35
##  3 chiapas                35
##  4 colima                 36
##  5 durango                35
##  6 guerrero               35
##  7 jalisco                35
##  8 mexico                 37
##  9 michoacan              35
## 10 morelos                37
## # … with 11 more rows
mango_ts <- mango %>% 
  ggplot(aes(year, ag_harv)) + 
  geom_line()+
  ylab("Area harvested (ha)") +
  xlab("Years") +
  ggtitle("Mango - Area Harvested (ha) 1980 - 2016") +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
  geom_rect(data = subset(mango, state %in% c(mango_complete$state)), 
            fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
            ymin = -Inf,ymax = Inf) +     
  facet_wrap(~state, scales="free_y", ncol=5) 
  #facet_wrap(~state, ncol=5)
mango_ts  
## Warning: Removed 5 rows containing missing values (geom_path).

Yields ~ Time

Mango yields ~ temperature at the national level

yt <- mango_tmax %>% 
  ggplot(aes(mean_tmax, ag_yield)) + 
  geom_point() +
  geom_smooth(method = "lm", se = T) +
  ylab("Yield (tonnes/ha)") +
  xlab("Temperature (ºC) ") +
  ggtitle("Mango Yields ~ Max Temperature")
yt
## Warning: Removed 787 rows containing non-finite values (stat_smooth).
## Warning: Removed 787 rows containing missing values (geom_point).

yt <- mango_tmin %>% 
  ggplot(aes(mean_tmin, ag_yield)) + 
  geom_point() +
  geom_smooth(method = "lm", se = T) +
  ylab("Yield (tonnes/ha)") +
  xlab("Temperature (ºC) ") +
  ggtitle("Mango Yields ~ Min Temperature") #+
yt
## Warning: Removed 791 rows containing non-finite values (stat_smooth).
## Warning: Removed 791 rows containing missing values (geom_point).

mango_lm <- mango_tmax %>% 
  filter(state %in% c("baja california sur", "durango", "nayarit", "sinaloa", "sonora")) %>% 
  ggplot(aes(mean_tmax, ag_yield)) + 
  geom_point() +
  geom_smooth(method = "lm", se = T) +
  #scale_y_continuous(trans = 'log10')+
  ggtitle("mango Yields") +
   ylab("Yield (tonnes/ha)") +
  xlab("Temperature (ºC) ") +
  ggtitle("Mango Yields ~ Max Temperature") +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
  facet_wrap(~state, scales="free_y") 
mango_lm
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

mango_lm <- mango_tmin %>% 
  filter(state %in% c("baja california sur", "durango", "nayarit", "sinaloa", "sonora")) %>% 
  ggplot(aes(mean_tmin, ag_yield)) + 
  geom_point() +
  geom_smooth(method = "lm", se = T) +
  ylab("Yield (tonnes/ha)") +
  xlab("Temperature (ºC)") +
  #scale_y_continuous(trans = 'log10')+
  ggtitle("Mango Yields ~ Min Temperature") +
  facet_wrap(~state, scales="free_y") 
mango_lm
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 14 rows containing missing values (geom_point).